Diabetes distribution

diabetes |>  
  count(diabetes_012) |>  
  plot_ly(
    x = ~diabetes_012,
    y = ~n,
    type = "bar",
    color = ~diabetes_012
  ) |>  
  layout(
    barmode = "stack",
    title = "Diabetes Prevalence",
    xaxis = list(title = "Age Group"),
    yaxis = list(title = "Count")
  )

Sex & Age distribution

diabetes |>
  count(sex, age) |>  
  plot_ly(
    x = ~age,
    y = ~n,
    type = "bar",
    color = ~sex,
    colors = c("tomato","skyblue")
  ) %>% 
  layout(
    barmode = "group",
    title = "Sex and Age Group Distribution",
    xaxis = list(title = "Age Group"),
    yaxis = list(title = "Count")
  )
diabetes |>
  group_by(age, sex) |> 
  summarise(diabetes_rate = mean(diabetes_012 == "Diabetes")) |>  
  plot_ly(
    x = ~age,
    y = ~diabetes_rate,
    color = ~sex,
    colors = c("tomato","skyblue"),
    type = "scatter",
    mode = "lines+markers"
  ) |>  
  layout(
    title = "Diabetes Prevalence by Sex and Age Group",
    xaxis = list(title = "Age Group"),
    yaxis = list(title = "Diabetes Rate")
  )
## `summarise()` has grouped output by 'age'. You can override using
## the `.groups` argument.

BMI Distribution

plot_ly(
  data = diabetes,
  x = ~diabetes_012,
  y = ~bmi,
  color = ~diabetes_012,
  type = "box"
  ) |> 
  layout(
    title = "BMI Distribution by Diabetes Status",
    xaxis = list(title = "Diabetes Status"),
    yaxis = list(title = "BMI")
  )
# filtering prediabetes
diabetes_nd = diabetes |>
  filter(diabetes_012 != "Prediabetes") |>
  droplevels()

# long format
binary_long_nd = diabetes_nd |>
  select(diabetes_012, all_of(binary_vars)) |>
  pivot_longer(
    cols = all_of(binary_vars),
    names_to = "variable",
    values_to = "value"
  )

# proportion "Yes" in each group
binary_yes_nd = binary_long_nd |>
  filter(value == "Yes") |>
  count(variable, diabetes_012) |>
  group_by(variable) |>
  mutate(prop = n / sum(n)) |>
  ungroup()

# proportion "No" in each group
binary_no_nd = binary_long_nd |>
  filter(value == "No") |>
  count(variable, diabetes_012) |>
  group_by(variable) |>
  mutate(prop = n / sum(n)) |>
  ungroup()

Clinical Conditions vs Diabetes

clinical_vars = c("high_bp", "high_chol", "stroke", "heart_diseaseor_attack")

# answered yes
p_clinical = binary_yes_nd |>
  filter(variable %in% clinical_vars) |>
  ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Clinical Conditions: Proportion 'Yes' by Diabetes Status",
    x = NULL,
    y = "Proportion 'Yes'",
    fill = "Diabetes status"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

# answered no
p_clinical_no = binary_no_nd |>
  filter(variable %in% clinical_vars) |>
  ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Clinical Variables: Proportion 'No' by Diabetes Status",
    x = NULL,
    y = "Proportion 'No'",
    fill = "Diabetes status"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))



ggplotly(p_clinical)
ggplotly(p_clinical_no)

Lifestyle vs Diabetes

behavior_vars = c("smoker", "phys_activity", "fruits", "veggies", "hvy_alcohol_consump")

# answered yes
p_behavior = binary_yes_nd |>
  filter(variable %in% behavior_vars) |>
  ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Lifestyle Behaviors: Proportion 'Yes' by Diabetes Status",
    x = NULL,
    y = "Proportion 'Yes'",
    fill = "Diabetes status"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

# answered no
p_behavior_no = binary_no_nd |>
  filter(variable %in% behavior_vars) |>
  ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Lifestyle Behaviors: Proportion 'No' by Diabetes Status",
    x = NULL,
    y = "Proportion 'No'",
    fill = "Diabetes status"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))



ggplotly(p_behavior)
ggplotly(p_behavior_no)

Healthcare Access and Function vs Diabetes

access_vars = c("chol_check", "any_healthcare", "no_docbc_cost", "diff_walk", "sex")

# answered yes
p_access = binary_yes_nd |>
  filter(variable %in% access_vars) |>
  ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Access, Functional Limitations, and Sex: Proportion 'Yes'",
    x = NULL,
    y = "Proportion 'Yes'",
    fill = "Diabetes status"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

# answered no
p_access_no = binary_no_nd |>
  filter(variable %in% access_vars) |>
  ggplot(aes(x = variable, y = prop, fill = diabetes_012)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Access, Function, Sex: Proportion 'No' by Diabetes Status",
    x = NULL,
    y = "Proportion 'No'",
    fill = "Diabetes status"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))



ggplotly(p_access)
ggplotly(p_access_no)